#-----------------------------------#
# Replication of:
# Seeking Opportunity in the Knowledge Economy: Moving Places, Moving Politics?
# Valentina Consiglio, Thomas Kurer

# File: 4_plots

# Note: This file prepares the data for and creates the following figures
# Figure 4a: Socio-Demgraphic Characteristics of Movers – Moving Probability
# Figure 4b: Socio-Demgraphic Characteristics of Movers – Moving Direction

#-----------------------------------#

#-------------------------------------------------------------------------------# 

# Clear workspace
rm(list=ls())

#### Load Packages ####
library(pacman)
p_load(car, arm, lmtest, vars, ggplot2, psych, stargazer, dplyr, readxl, tidyverse, 
       haven, labelled, gtsummary, survey, srvyr, BMisc, questionr,
       scales, lubridate, gridExtra, RColorBrewer, forcats, ggthemes)

#### Define relevant paths ####
tables_path <- "tables/"
graph_path <- "figures/"
data_path <- "data_created/" 

#### Set run mode ####
data_room <- FALSE  
# Set TRUE for replication in the data room; code will run through fully; 
# if FALSE, only plots will be generated based on csv files in replication folder.

#-------------------------------------------------------------------------------# 
#### Import data #### 
if (data_room) {

load("data_created/df_2009_soepoi_v4.RData")
  
soepoi_v4 <- df_2009

# Rename for plots
var_label(soepoi_v4)
var_label(soepoi_v4) <- list(
  emplst_d = "Employment status",
  emplst_d_lag1 = "Previous employment status",
  income_grp_clean = "Income group (w/o students)"
)

# Sample selection for descriptive statistics #

## Only realized interviews, 18+ and years 2011-2020

val_labels(soepoi_v4$netto)
ds <- subset(soepoi_v4, netto >= 10 & netto <= 18 & age >= 18 & age <= 70 & syear >= 2010)

# Rename opmove_ub --> opmove for convenience and drop opmove (balanced lag) which is not used in this case
ds <- ds %>%
  select(-c("opmove"))

ds <-  ds %>%
  rename(opmove = opmove_ub)

#-------------------------------------------------------------------------------# 
#### 1 | Share of movers by socio-economic characteristics ####
#-------------------------------------------------------------------------------# 

#-------------------------------------------------------------------------------# 
##### 1.1 | Data ####
#-------------------------------------------------------------------------------# 

# Select relevant variables for analyses with moved_krs
# re-construct weightsusing average
ds$phrf1_11 <- ds$phrf1/11

ds_svy3 <- ds %>% 
  select(c("syear", "pid", "phrf1_11","moved_krs", "opmove",
           
           "sex", "age_grp", "region_lag1", "educ", "acfbg", "income_grp_clean", "hhtype", "marstatag", "childd",
           
           "emplst_d", "emplst_d_lag1",  "egpag_ext", "egpag_ext_lag1",
           
           "oiqtl_wn_lag1")) 



# Recode as factor so that haven_labels are used for sumstats in tbl
ds_svy_fct3 <- haven::as_factor(ds_svy3)

# Set survey design
ds_svy_design3 <- survey::svydesign(data = ds_svy_fct3, ids = ~pid, weights = ~phrf1_11)
ds$moved_krs

for (i in 6:19){
  
  ratio <- svyby(formula = ~moved_krs,
                 by = ds_svy_design3[[c(7, i)]],
                 design = ds_svy_design3,
                 FUN = svymean, 
                 na.rm = TRUE)%>%
    mutate(lower_95 = moved_krsMoved - (2*se.moved_krsMoved), # add errors
           upper_95 = moved_krsMoved + (2*se.moved_krsMoved)) %>%
    select(-c(moved_krsStayed, se.moved_krsStayed)) 
  
  
  count <- svyby(formula = ~moved_krs,
                 by = ds_svy_design3[[c(7, i)]],
                 design = ds_svy_design3,
                 FUN = unwtd.count, 
                 na.rm = TRUE) %>%
    select(-c(se))
  
  joint <- left_join(ratio, count, by = "by")
  assign(paste0("out_moved_krs_",i), joint)
}

# Bind
# 6 "sex", 7 "age_grp", 8 "region_lag1", 9 "educ", 10 "acfbg", 11 "income_grp_clean", 12 "hhtype", 13 "marstatag", 14 "childd",
# 15 "emplst_d", 16 "emplst_d_lag1",  17 "egpag_ext", 18 "egpag_ext_lag1",
# 19 "oiqtl_wn_lag1"

# All vars
tab_movers_krs <- bind_rows(out_moved_krs_6, out_moved_krs_7, out_moved_krs_8, out_moved_krs_9, out_moved_krs_10, out_moved_krs_11, out_moved_krs_12,
                            out_moved_krs_13, out_moved_krs_14, out_moved_krs_15, out_moved_krs_16, out_moved_krs_17,
                            out_moved_krs_18, out_moved_krs_19)
tab_movers_krs

# Lagged vars
tab_movers_krs_lags <- bind_rows(out_moved_krs_8, out_moved_krs_16, out_moved_krs_18, out_moved_krs_19)
tab_movers_krs_lags


# Combined vars (w/o non-lags)
tab_movers_krs_comb <- bind_rows(out_moved_krs_6, out_moved_krs_7, out_moved_krs_8, out_moved_krs_9, out_moved_krs_10, out_moved_krs_11, out_moved_krs_12,
                                 out_moved_krs_13, out_moved_krs_14, out_moved_krs_16,
                                 out_moved_krs_18, out_moved_krs_19)
tab_movers_krs_comb


# Save aggregated table output
write.csv(tab_movers_krs_comb, (paste0(data_path,"4_plots_movers_bysociodemographics.csv")), row.names = FALSE)

}

#-------------------------------------------------------------------------------# 
##### 1.2 | Plot ####
#-------------------------------------------------------------------------------# 

tab_movers_comb <- read.csv(file = "data_created/4_plots_movers_bysociodemographics.csv")

# Delete some groups
df_movers_comb <- tab_movers_comb %>%
  slice(-c(9:13, 29:34, 39:53))

# Bar plot

gg_movers_bygroup_bar <- ggplot(df_movers_comb) +
  geom_bar( aes(x=by, y=moved_krsMoved), stat="identity", alpha=0.5) +
  geom_errorbar(aes(x=by, y=moved_krsMoved, ymin=lower_95, ymax=upper_95), colour = "gray28") +
  coord_flip() + 
  labs(x = element_blank(),
       y = element_blank()) +
  scale_x_discrete(limits=df_movers_comb$by ) +  # sort categories
  theme(plot.margin = margin(t = 42, b = 21),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.text = element_text(color = "black", size = 12),
        legend.key.size = unit(0.8, 'cm'),
        axis.title.y = element_text(margin = margin(t = 5, r = 0, b = 0, l = 0)),
        axis.text.y.right = element_text(margin = unit(c(1, 1, 1, 1), "mm"))) +
  scale_fill_economist() 

gg_movers_bygroup_bar

ggsave("figures/Figure_4a.png",
       plot = gg_movers_bygroup_bar, 
       dpi = 600,
       width = 7, height = 5)


#-------------------------------------------------------------------------------# 
#### 2 | Share of up- and down-movers by socio-economic characteristics  ####
#-------------------------------------------------------------------------------# 

#-------------------------------------------------------------------------------# 
##### 2.1 | Data ####
#-------------------------------------------------------------------------------# 

# A Kreis-based moving definition means that we only consider those as movers who move across kreis.
# Thereby, only an up or a down-move among movers is possible. 

# Select relevant variables for analyses with moved_krs
if (data_room) {
ds_svy4 <- ds %>% 
  select(c("syear", "pid", "phrf1_11","moved_krs", "opmove",
           
           "sex", "age_grp", "region_lag1", "educ", "acfbg", "income_grp_clean", "hhtype", "marstatag", "childd",
           
           "emplst_d", "emplst_d_lag1",  "egpag_ext", "egpag_ext_lag1",
           
           "oiqtl_wn_lag1")) %>%
  filter(moved_krs == 1)


# Recode as factor so that haven_labels are used for sumstats in tbl
ds_svy_fct4 <- haven::as_factor(ds_svy4)

# Set survey design
ds_svy_design4 <- survey::svydesign(data = ds_svy_fct4, ids = ~pid, weights = ~phrf1_11)
ds$moved_krs

# Loop over all variables in selected data set #

for (i in 6:19){
  
  ratio <- svyby(formula = ~opmove,
                 by = ds_svy_design4[[c(7, i)]],
                 design = ds_svy_design4,
                 FUN = svymean, 
                 na.rm = TRUE) %>%
    mutate(lower_95_up = opmoveUp - (2*se.opmoveUp), # add errors
           upper_95_up = opmoveUp + (2*se.opmoveUp),
           lower_95_down = opmoveDown - (2*se.opmoveDown), # add errors
           upper_95_down = opmoveUp + (2*se.opmoveDown)) %>%
    select(-c(opmoveStable, se.opmoveStable))
  
  
  count <- svyby(formula = ~opmove,
                 by = ds_svy_design4[[c(7, i)]],
                 design = ds_svy_design4,
                 FUN = unwtd.count, 
                 na.rm = TRUE) %>%
    select(-c(se))
  
  joint <- left_join(ratio, count, by = "by")
  assign(paste0("out_opmove_krs_",i), joint)

}

# Bind
# 6 - sex, 7 - age_grp, 8 - region_3p, 9 - educ, 10 - acfbg, 11 - income_grp_clean, 12 - hhtype, 
# 13 - marstatag, 14 - childd, 15 - emplst_d, 16 - emplst_d_3p, 17 - egpag_ext, 18 - egpag_ext_3p
# 19 - oiqtl_wn_3p

# All vars
tab_opmove_krs <- bind_rows(out_opmove_krs_6, out_opmove_krs_7, out_opmove_krs_8, out_opmove_krs_9, 
                            out_opmove_krs_10, out_opmove_krs_11, out_opmove_krs_12,
                            out_opmove_krs_13, out_opmove_krs_14, out_opmove_krs_15, out_opmove_krs_16, 
                            out_opmove_krs_17, out_opmove_krs_18, out_opmove_krs_19)
tab_opmove_krs

# Lagged vars
tab_opmove_lags_krs <- bind_rows(out_opmove_krs_8, out_opmove_krs_16, out_opmove_krs_18, out_opmove_krs_19)
tab_opmove_lags_krs

# Combined vars (w/o non-lags)
tab_opmove_comb_krs <- bind_rows(out_opmove_krs_6, out_opmove_krs_7, out_opmove_krs_8, out_opmove_krs_9, 
                                 out_opmove_krs_10, out_opmove_krs_11, out_opmove_krs_12,
                                 out_opmove_krs_13, out_opmove_krs_14, out_opmove_krs_16,
                                 out_opmove_krs_18, out_opmove_krs_19)
tab_opmove_comb_krs


# Save aggregated table output
write.csv(tab_opmove_comb_krs, (paste0(data_path, "4_plots_updownmovers_bysociodemographics.csv")), row.names = FALSE)
}
#-------------------------------------------------------------------------------# 
##### 2.2 | Plot ####
#-------------------------------------------------------------------------------# 

# Load data
tab_opmove_krs_comb <- read.csv(file = "data_created/4_plots_updownmovers_bysociodemographics.csv")

# Select columns and rows
df_opmove_krs <- tab_opmove_krs_comb %>%
  slice(-c(9:13, 29:34, 39:53)) # delete not-relevant categories or those with too little observations

# create new vars to scale to per cent
df_opmove_krs <- df_opmove_krs %>%
  mutate(Up = (opmoveUp/(opmoveUp+opmoveDown)*100), 
         Down = (opmoveDown/(opmoveUp+opmoveDown)*100)) 

# Select columns and rows
df_opmove_krs <- df_opmove_krs %>%
  select(by, Up, Down) 
df_opmove_krs

# Reshape for stacked graph
df_opmove_krs_reshape <- df_opmove_krs %>%
  gather("group", "share", 2:3)

# Plot stacked graph with value labels

gg_stack_opmove_krs_bygroup_numlabs <- ggplot(df_opmove_krs_reshape, aes(x = share, 
                                                                         y = by,
                                                                         fill = group)) +
  aes(y = fct_inorder(by)) + 
  geom_bar(position = "stack", stat = "identity") + 
  labs(x = "(in %)", y = element_blank()) +
  geom_text(aes(label = round(share, digits = 0)),
            position = position_stack(vjust = .5),
            size = 3,
            colour = "white") +
  theme_economist_white(gray_bg = FALSE) +
  theme(axis.title.y = element_text(margin = margin(t = 0, r = 5, b = 0, l = 0)),
        axis.text.y = element_text(size = 12),
        legend.title = element_blank(),  
        legend.text = element_text(size = 10),
        legend.key.size = unit(0.3, 'cm'),
        axis.text.y.right = element_text(margin = unit(c(1, 1, 1, 1), "mm"))) +
  scale_fill_manual(values = c(
    "Up" = "gray67",
    "Down" = "gray28"
  )) +
  scale_y_discrete(position = "right")
gg_stack_opmove_krs_bygroup_numlabs

ggsave("figures/Figure_4b.png",
       plot = gg_stack_opmove_krs_bygroup_numlabs, 
       dpi = 600,
       width = 7, height = 5)


#-------------------------------------------------------------------------------# 
#-------------------------------------------------------------------------------# 

